/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- *//* vim: set ts=8 sts=2 et sw=2 tw=80: *//* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. *//* * A class which represents a fragment of text (eg inside a text * node); if only codepoints below 256 are used, the text is stored as * a char*; otherwise the text is stored as a char16_t* */#include"nsTextFragment.h"#include"nsCRT.h"#include"nsReadableUtils.h"#include"nsMemory.h"#include"nsBidiUtils.h"#include"nsUnicharUtils.h"#include"nsUTF8Utils.h"#include"mozilla/CheckedInt.h"#include"mozilla/MemoryReporting.h"#include"mozilla/SSE.h"#include"nsTextFragmentImpl.h"#include<algorithm>#define TEXTFRAG_WHITE_AFTER_NEWLINE 50#define TEXTFRAG_MAX_NEWLINES 7// Static buffer used for common fragmentsstaticchar*sSpaceSharedString[TEXTFRAG_MAX_NEWLINES+1];staticchar*sTabSharedString[TEXTFRAG_MAX_NEWLINES+1];staticcharsSingleCharSharedString[256];usingmozilla::CheckedUint32;// staticnsresultnsTextFragment::Init(){// Create whitespace stringsuint32_ti;for(i=0;i<=TEXTFRAG_MAX_NEWLINES;++i){sSpaceSharedString[i]=newchar[1+i+TEXTFRAG_WHITE_AFTER_NEWLINE];sTabSharedString[i]=newchar[1+i+TEXTFRAG_WHITE_AFTER_NEWLINE];sSpaceSharedString[i][0]=' ';sTabSharedString[i][0]=' ';uint32_tj;for(j=1;j<1+i;++j){sSpaceSharedString[i][j]='\n';sTabSharedString[i][j]='\n';}for(;j<(1+i+TEXTFRAG_WHITE_AFTER_NEWLINE);++j){sSpaceSharedString[i][j]=' ';sTabSharedString[i][j]='\t';}}// Create single-char stringsfor(i=0;i<256;++i){sSingleCharSharedString[i]=i;}returnNS_OK;}// staticvoidnsTextFragment::Shutdown(){uint32_ti;for(i=0;i<=TEXTFRAG_MAX_NEWLINES;++i){delete[]sSpaceSharedString[i];delete[]sTabSharedString[i];sSpaceSharedString[i]=nullptr;sTabSharedString[i]=nullptr;}}nsTextFragment::~nsTextFragment(){ReleaseText();MOZ_COUNT_DTOR(nsTextFragment);}voidnsTextFragment::ReleaseText(){if(mState.mLength&&m1b&&mState.mInHeap){free(m2b);// m1b == m2b as far as free is concerned}m1b=nullptr;mState.mIsBidi=false;// Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;mAllBits=0;}nsTextFragment&nsTextFragment::operator=(constnsTextFragment&aOther){ReleaseText();if(aOther.mState.mLength){if(!aOther.mState.mInHeap){m1b=aOther.m1b;// This will work even if aOther is using m2b}else{CheckedUint32m2bSize=aOther.mState.mLength;m2bSize*=(aOther.mState.mIs2b?sizeof(char16_t):sizeof(char));m2b=nullptr;if(m2bSize.isValid()){m2b=static_cast<char16_t*>(malloc(m2bSize.value()));}if(m2b){memcpy(m2b,aOther.m2b,m2bSize.value());}else{// allocate a buffer for a single REPLACEMENT CHARACTERm2b=static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));m2b[0]=0xFFFD;// REPLACEMENT CHARACTERmState.mIs2b=true;mState.mInHeap=true;mState.mLength=1;}}if(m1b){mAllBits=aOther.mAllBits;}}return*this;}staticinlineint32_tFirstNon8BitUnvectorized(constchar16_t*str,constchar16_t*end){typedefNon8BitParameters<sizeof(size_t)>p;constsize_tmask=p::mask();constuint32_talignMask=p::alignMask();constuint32_tnumUnicharsPerWord=p::numUnicharsPerWord();constint32_tlen=end-str;int32_ti=0;// Align ourselves to a word boundary.int32_talignLen=std::min(len,int32_t(((-NS_PTR_TO_INT32(str))&alignMask)/sizeof(char16_t)));for(;i<alignLen;i++){if(str[i]>255)returni;}// Check one word at a time.constint32_twordWalkEnd=((len-i)/numUnicharsPerWord)*numUnicharsPerWord;for(;i<wordWalkEnd;i+=numUnicharsPerWord){constsize_tword=*reinterpret_cast<constsize_t*>(str+i);if(word&mask)returni;}// Take care of the remainder one character at a time.for(;i<len;i++){if(str[i]>255)returni;}return-1;}#ifdef MOZILLA_MAY_SUPPORT_SSE2namespacemozilla{namespaceSSE2{int32_tFirstNon8Bit(constchar16_t*str,constchar16_t*end);}// namespace SSE2}// namespace mozilla#endif/* * This function returns -1 if all characters in str are 8 bit characters. * Otherwise, it returns a value less than or equal to the index of the first * non-8bit character in str. For example, if first non-8bit character is at * position 25, it may return 25, or for example 24, or 16. But it guarantees * there is no non-8bit character before returned value. */staticinlineint32_tFirstNon8Bit(constchar16_t*str,constchar16_t*end){#ifdef MOZILLA_MAY_SUPPORT_SSE2if(mozilla::supports_sse2()){returnmozilla::SSE2::FirstNon8Bit(str,end);}#endifreturnFirstNon8BitUnvectorized(str,end);}boolnsTextFragment::SetTo(constchar16_t*aBuffer,int32_taLength,boolaUpdateBidi){ReleaseText();if(aLength==0){returntrue;}char16_tfirstChar=*aBuffer;if(aLength==1&&firstChar<256){m1b=sSingleCharSharedString+firstChar;mState.mInHeap=false;mState.mIs2b=false;mState.mLength=1;returntrue;}constchar16_t*ucp=aBuffer;constchar16_t*uend=aBuffer+aLength;// Check if we can use a shared stringif(aLength<=1+TEXTFRAG_WHITE_AFTER_NEWLINE+TEXTFRAG_MAX_NEWLINES&&(firstChar==' '||firstChar=='\n'||firstChar=='\t')){if(firstChar==' '){++ucp;}constchar16_t*start=ucp;while(ucp<uend&&*ucp=='\n'){++ucp;}constchar16_t*endNewLine=ucp;char16_tspace=ucp<uend&&*ucp=='\t'?'\t':' ';while(ucp<uend&&*ucp==space){++ucp;}if(ucp==uend&&endNewLine-start<=TEXTFRAG_MAX_NEWLINES&&ucp-endNewLine<=TEXTFRAG_WHITE_AFTER_NEWLINE){char**strings=space==' '?sSpaceSharedString:sTabSharedString;m1b=strings[endNewLine-start];// If we didn't find a space in the beginning, skip it now.if(firstChar!=' '){++m1b;}mState.mInHeap=false;mState.mIs2b=false;mState.mLength=aLength;returntrue;}}// See if we need to store the data in ucs2 or notint32_tfirst16bit=FirstNon8Bit(ucp,uend);if(first16bit!=-1){// aBuffer contains no non-8bit character// Use ucs2 storage because we have toCheckedUint32m2bSize=aLength;m2bSize*=sizeof(char16_t);if(!m2bSize.isValid()){returnfalse;}m2b=static_cast<char16_t*>(malloc(m2bSize.value()));if(!m2b){returnfalse;}memcpy(m2b,aBuffer,m2bSize.value());mState.mIs2b=true;if(aUpdateBidi){UpdateBidiFlag(aBuffer+first16bit,aLength-first16bit);}}else{// Use 1 byte storage because we canchar*buff=static_cast<char*>(malloc(aLength));if(!buff){returnfalse;}// Copy dataLossyConvertEncoding16to8converter(buff);copy_string(aBuffer,aBuffer+aLength,converter);m1b=buff;mState.mIs2b=false;}// Setup our fieldsmState.mInHeap=true;mState.mLength=aLength;returntrue;}voidnsTextFragment::CopyTo(char16_t*aDest,int32_taOffset,int32_taCount){NS_ASSERTION(aOffset>=0,"Bad offset passed to nsTextFragment::CopyTo()!");NS_ASSERTION(aCount>=0,"Bad count passed to nsTextFragment::CopyTo()!");if(aOffset<0){aOffset=0;}if(uint32_t(aOffset+aCount)>GetLength()){aCount=mState.mLength-aOffset;}if(aCount!=0){if(mState.mIs2b){memcpy(aDest,m2b+aOffset,sizeof(char16_t)*aCount);}else{constchar*cp=m1b+aOffset;constchar*end=cp+aCount;LossyConvertEncoding8to16converter(aDest);copy_string(cp,end,converter);}}}boolnsTextFragment::Append(constchar16_t*aBuffer,uint32_taLength,boolaUpdateBidi){// This is a common case because some callsites create a textnode// with a value by creating the node and then calling AppendData.if(mState.mLength==0){returnSetTo(aBuffer,aLength,aUpdateBidi);}// Should we optimize for aData.Length() == 0?CheckedUint32length=mState.mLength;length+=aLength;if(!length.isValid()){returnfalse;}if(mState.mIs2b){length*=sizeof(char16_t);if(!length.isValid()){returnfalse;}// Already a 2-byte string so the result will be toochar16_t*buff=static_cast<char16_t*>(realloc(m2b,length.value()));if(!buff){returnfalse;}memcpy(buff+mState.mLength,aBuffer,aLength*sizeof(char16_t));mState.mLength+=aLength;m2b=buff;if(aUpdateBidi){UpdateBidiFlag(aBuffer,aLength);}returntrue;}// Current string is a 1-byte string, check if the new data fits in one byte too.int32_tfirst16bit=FirstNon8Bit(aBuffer,aBuffer+aLength);if(first16bit!=-1){// aBuffer contains no non-8bit characterlength*=sizeof(char16_t);if(!length.isValid()){returnfalse;}// The old data was 1-byte, but the new is not so we have to expand it// all to 2-bytechar16_t*buff=static_cast<char16_t*>(malloc(length.value()));if(!buff){returnfalse;}// Copy data into buffLossyConvertEncoding8to16converter(buff);copy_string(m1b,m1b+mState.mLength,converter);memcpy(buff+mState.mLength,aBuffer,aLength*sizeof(char16_t));mState.mLength+=aLength;mState.mIs2b=true;if(mState.mInHeap){free(m2b);}m2b=buff;mState.mInHeap=true;if(aUpdateBidi){UpdateBidiFlag(aBuffer+first16bit,aLength-first16bit);}returntrue;}// The new and the old data is all 1-bytechar*buff;if(mState.mInHeap){buff=static_cast<char*>(realloc(const_cast<char*>(m1b),length.value()));if(!buff){returnfalse;}}else{buff=static_cast<char*>(malloc(length.value()));if(!buff){returnfalse;}memcpy(buff,m1b,mState.mLength);mState.mInHeap=true;}// Copy aBuffer into buff.LossyConvertEncoding16to8converter(buff+mState.mLength);copy_string(aBuffer,aBuffer+aLength,converter);m1b=buff;mState.mLength+=aLength;returntrue;}/* virtual */size_tnsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOfaMallocSizeOf)const{if(Is2b()){returnaMallocSizeOf(m2b);}if(mState.mInHeap){returnaMallocSizeOf(m1b);}return0;}// To save time we only do this when we really want to know, not during// every allocationvoidnsTextFragment::UpdateBidiFlag(constchar16_t*aBuffer,uint32_taLength){if(mState.mIs2b&&!mState.mIsBidi){if(HasRTLChars(aBuffer,aLength)){mState.mIsBidi=true;}}}